/**
 * @name 腾讯 - 视频+财经
 * @appid 1791608
 */

import Creeper, { _ } from '../lib/creeper';

const app = new Creeper({
  domains: ['ent.qq.com', 'new.qq.com'],
  scanUrls: [
    // 'https://ent.qq.com/movie/',
    'https://new.qq.com/ch/finance/',
  ],
  contentUrlRegexes: [/https:\/\/www\.cbnweek\.com\/articles\/\w+\/\d+/],
  fields: [
    {
      name: 'content',
      alias: '内容',
      selector: '//div[contains(@class,"article-content")]',
      required: true,
    },
  ],
});

app.onProcessScanPage((page, content, site) => {
  if (/finance/.test(page.url)) {
    getFinance(content, site); // 财经
  }
  return false;
});

app.start();


// 获取财经内容
function getFinance(content: string, site: Shenjian.Site) {
  const re = /chData=(.+?)<\/script>/;
  const ignore = ['AI财经社', '澎湃新闻', '第一财经', '华尔街见闻', '界面新闻'];

  const json = (content.match(re) || [])[1];
  const { data = [] } = Function(`return ${json}`)() || {};

  const list = data
    .filter((it: any) => !ignore.includes(it.source))
    .map((it: any) => ({
      title: it.title,
      time: it.publish_time,
      url: it.url,
      image: it.img,
    }));

  list.forEach((it: any) => {
    const dt = new Date(it.time);
    if (_.expired(dt, '24h')) {
      return false;
    }

    site.addUrl(it.url);
    _.store.set(it.url, {
      title: it.title,
      time: it.time,
      image: _.fixImg(it.image, false),
    });
  });

  console.log(`发现 ${list.length} 个符合要求的文章，准备爬取...`);
}
